from __future__ import division
import numpy as np
import math
import sys


# Calculate the entropy of label array y
def calculate_entropy(y):
    log2 = lambda x: math.log(x) / math.log(2)
    unique_labels = np.unique(y)
    entropy = 0
    for label in unique_labels:
        count = len(y[y == label])
        p = count / len(y)
        entropy += -p * log2(p)
    return entropy


# Returns the mean squared error between y_true and y_pred
def mean_squared_error(y_true, y_pred):
    mse = np.mean(np.power(y_true - y_pred, 2))
    return mse


# Return the variance of the features in dataset X
def calculate_variance(X):
    mean = np.ones(np.shape(X)) * X.mean(0)
    n_samples = np.shape(X)[0]
    variance = (1 / n_samples) * np.diag((X - mean).T.dot(X - mean))

    return variance


# Calculate the standard deviations of the features in dataset X
def calculate_std_dev(X):
    std_dev = np.sqrt(calculate_variance(X))

    return std_dev


# Calculate the distance between two vectors
def euclidean_distance(x1, x2):
    distance = 0
    for i in range(len(x1)):
        distance += pow((x1[i] - x2[i]), 2)

    return math.sqrt(distance)


# Compare y_true to y_pred and return the accuracy
def accuracy_score(y_true, y_pred):
    correct = 0
    for i in range(len(y_true)):
        diff = y_true[i] - y_pred[i]
        if diff == np.zeros(np.shape(diff)):
            correct += 1
    return correct / len(y_true)


# Calculate the covariance matrix for the dataset X
def calculate_covariance_matrix(X, Y=None):
    if not Y:
        Y = X
    X_mean = np.ones(np.shape(X)) * X.mean(0)
    Y_mean = np.ones(np.shape(Y)) * Y.mean(0)
    n_samples = np.shape(X)[0]
    covariance_matrix = (1 / (n_samples - 1)) * (X - X_mean).T.dot(Y - Y_mean)

    return np.array(covariance_matrix, dtype=float)


# Calculate the correlation matrix for the dataset X
def calculate_correlation_matrix(X, Y=None):
    if not Y:
        Y = X
    covariance = calculate_covariance_matrix(X, Y)
    std_dev_X = np.expand_dims(calculate_std_dev(X), 1)
    std_dev_Y = np.expand_dims(calculate_std_dev(Y), 1)
    correlation_matrix = np.divide(covariance, std_dev_X.dot(std_dev_y.T))

    return np.array(correlation_matrix, dtype=float)
